{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "3e5b1c52-2333-403f-8ffa-f1baaf5d36e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f3d1f39f-7686-425a-8516-6948be5307d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def printlines(lines):\n",
    "    for line in lines:\n",
    "        print(line,end='')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "462a6d8b-7ebc-4c49-b528-304302560a04",
   "metadata": {},
   "outputs": [],
   "source": [
    "def printFile(path):\n",
    "    with open(path) as f:\n",
    "        lines = f.readlines()\n",
    "        printlines(lines)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "72facc3e-3390-4874-81c0-e6dd7709b97a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a,b,c,d,message\n",
      "1,2,3,4,hello\n",
      "5,6,7,8,world\n",
      "9,10,11,12,foo"
     ]
    }
   ],
   "source": [
    "printFile('examples/ex1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "9eaa4577-d2d0-4a50-8b65-e7023275d2fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('examples/ex1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "701f15e4-f461-420d-9036-7ca5f72cc420",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a   b   c   d message\n",
       "0  1   2   3   4   hello\n",
       "1  5   6   7   8   world\n",
       "2  9  10  11  12     foo"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f4f32bce-4483-49fe-8410-3949cd8f1b0f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1,2,3,4,hello\n",
      "5,6,7,8,world\n",
      "9,10,11,12,foo"
     ]
    }
   ],
   "source": [
    "printFile('examples/ex2.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "69985c42-b26d-48dd-8187-25f497fb2e9d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0   1   2   3      4\n",
       "0  1   2   3   4  hello\n",
       "1  5   6   7   8  world\n",
       "2  9  10  11  12    foo"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('examples/ex2.csv',header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "1feab4fe-4cb7-4530-8dd0-fa97b6a1230c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a   b   c   d message\n",
       "0  1   2   3   4   hello\n",
       "1  5   6   7   8   world\n",
       "2  9  10  11  12     foo"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('examples/ex2.csv',names=['a','b','c','d','message'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "1973b0c2-2f73-4044-8ad5-d7dec066eb42",
   "metadata": {},
   "outputs": [],
   "source": [
    "names=['a','b','c','d','message']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "4928fb5a-88aa-4d88-9d6f-534c596454c7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>message</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>hello</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>world</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>foo</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         a   b   c   d\n",
       "message               \n",
       "hello    1   2   3   4\n",
       "world    5   6   7   8\n",
       "foo      9  10  11  12"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('examples/ex2.csv',names=names,index_col=\"message\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "48bf5823-aa65-45e0-976e-5fe0f4191146",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "key1,key2,value1,value2\n",
      "one,a,1,2\n",
      "one,b,3,4\n",
      "one,c,5,6\n",
      "one,d,7,8\n",
      "two,a,9,10\n",
      "two,b,11,12\n",
      "two,c,13,14\n",
      "two,d,15,16\n"
     ]
    }
   ],
   "source": [
    "printFile('examples/csv_mindex.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "d358f9c3-16cb-4da1-a4fa-f900d5304830",
   "metadata": {},
   "outputs": [],
   "source": [
    "parsed=pd.read_csv('examples/csv_mindex.csv',index_col=['key1','key2'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "28ea9c19-bf5a-4828-b690-7515caeb5373",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>value1</th>\n",
       "      <th>value2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">one</th>\n",
       "      <th>a</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">two</th>\n",
       "      <th>a</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           value1  value2\n",
       "key1 key2                \n",
       "one  a          1       2\n",
       "     b          3       4\n",
       "     c          5       6\n",
       "     d          7       8\n",
       "two  a          9      10\n",
       "     b         11      12\n",
       "     c         13      14\n",
       "     d         15      16"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "parsed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "5f3bb883-2c16-4b16-880b-cb5726a09376",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "            A         B         C\n",
      "aaa -0.264438 -1.026059 -0.619500\n",
      "bbb  0.927272  0.302904 -0.032399\n",
      "ccc -0.264273 -0.386314 -0.217601\n",
      "ddd -0.871858 -0.348382  1.100491\n"
     ]
    }
   ],
   "source": [
    "printFile('examples/ex3.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "c6134ff2-dfa4-4aed-a48e-7e22be545f56",
   "metadata": {},
   "outputs": [],
   "source": [
    "result = pd.read_csv('examples/ex3.txt',sep='\\s+')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "21c0cc75-8db6-4697-a2e5-dda72304a430",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>aaa</th>\n",
       "      <td>-0.264438</td>\n",
       "      <td>-1.026059</td>\n",
       "      <td>-0.619500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bbb</th>\n",
       "      <td>0.927272</td>\n",
       "      <td>0.302904</td>\n",
       "      <td>-0.032399</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ccc</th>\n",
       "      <td>-0.264273</td>\n",
       "      <td>-0.386314</td>\n",
       "      <td>-0.217601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ddd</th>\n",
       "      <td>-0.871858</td>\n",
       "      <td>-0.348382</td>\n",
       "      <td>1.100491</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            A         B         C\n",
       "aaa -0.264438 -1.026059 -0.619500\n",
       "bbb  0.927272  0.302904 -0.032399\n",
       "ccc -0.264273 -0.386314 -0.217601\n",
       "ddd -0.871858 -0.348382  1.100491"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "dc42bea1-11bb-47bc-81f3-1cc247a748af",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# hey!\n",
      "a,b,c,d,message\n",
      "# just wanted to make things more difficult for you\n",
      "# who reads CSV files with computers, anyway?\n",
      "1,2,3,4,hello\n",
      "5,6,7,8,world\n",
      "9,10,11,12,foo\n"
     ]
    }
   ],
   "source": [
    "printFile('examples/ex4.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "276d85da-0d67-40d1-a0a3-82816ec5d65b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a   b   c   d message\n",
       "0  1   2   3   4   hello\n",
       "1  5   6   7   8   world\n",
       "2  9  10  11  12     foo"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('examples/ex4.csv',skiprows=[0,2,3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "1291fcbb-9b0f-498c-99e2-316fa56483bd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "something,a,b,c,d,message\n",
      "one,1,2,3,4,NA\n",
      "two,5,6,,8,world\n",
      "three,9,10,11,12,foo"
     ]
    }
   ],
   "source": [
    "printFile('examples/ex5.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "1b90aeaa-7587-4058-a74b-8eb53c56a605",
   "metadata": {},
   "outputs": [],
   "source": [
    "result=pd.read_csv('examples/ex5.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "390fd73e-aa37-4c5e-a46b-3309e36d5dd4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>three</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11.0</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  something  a   b     c   d message\n",
       "0       one  1   2   3.0   4     NaN\n",
       "1       two  5   6   NaN   8   world\n",
       "2     three  9  10  11.0  12     foo"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "693673c4-ff44-49d8-8e0e-76865560cf38",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   something      a      b      c      d  message\n",
       "0      False  False  False  False  False     True\n",
       "1      False  False  False   True  False    False\n",
       "2      False  False  False  False  False    False"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.isna(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "50673dbb-040c-4f8b-90a5-7626765d4307",
   "metadata": {},
   "outputs": [],
   "source": [
    "result=pd.read_csv('examples/ex5.csv',na_values=['NULL'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "ec5b6a2c-b225-4ed3-806e-109350576aba",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>three</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11.0</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  something  a   b     c   d message\n",
       "0       one  1   2   3.0   4     NaN\n",
       "1       two  5   6   NaN   8   world\n",
       "2     three  9  10  11.0  12     foo"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "ea555a1b-da1d-4d1c-ab52-b98b42f626fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "result2 = pd.read_csv('examples/ex5.csv',keep_default_na=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "573e7b0c-3c90-45d5-a18e-e46c377238a3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td></td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>three</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  something  a   b   c   d message\n",
       "0       one  1   2   3   4      NA\n",
       "1       two  5   6       8   world\n",
       "2     three  9  10  11  12     foo"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "48953303-de57-4704-b9b0-548d0281655b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   something      a      b      c      d  message\n",
       "0      False  False  False  False  False    False\n",
       "1      False  False  False  False  False    False\n",
       "2      False  False  False  False  False    False"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result2.isna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "8a02c0da-8fcd-4b64-b026-610d3faf0c97",
   "metadata": {},
   "outputs": [],
   "source": [
    "result3 = pd.read_csv('examples/ex5.csv',keep_default_na=False,na_values=['NA'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "cc9adc84-a26a-4c0f-b1c0-43b243cf9e1c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td></td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>three</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  something  a   b   c   d message\n",
       "0       one  1   2   3   4     NaN\n",
       "1       two  5   6       8   world\n",
       "2     three  9  10  11  12     foo"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "2dfb3e49-ec5a-45fd-9e1a-73073b8dff2b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   something      a      b      c      d  message\n",
       "0      False  False  False  False  False     True\n",
       "1      False  False  False  False  False    False\n",
       "2      False  False  False  False  False    False"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result3.isna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "9cea7cb2-897f-41e1-856d-c7df1ab198be",
   "metadata": {},
   "outputs": [],
   "source": [
    "sentinels={'message':['foo','NA'],'something':['two']}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "952b3ce1-40c1-4813-8587-ffb62f117cb7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td></td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>three</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  something  a   b   c   d message\n",
       "0       one  1   2   3   4     NaN\n",
       "1       NaN  5   6       8   world\n",
       "2     three  9  10  11  12     NaN"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('examples/ex5.csv',na_values=sentinels,keep_default_na=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "1f68a111-1aca-48a9-b16d-20c55d780474",
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.options.display.max_rows=10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "5880e682-2521-4ed6-bcbb-b10eee6ea3ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "result=pd.read_csv('examples/ex6.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "1f99f1e6-2ace-4d3c-bc22-d351abf49e72",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "      <th>key</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.467976</td>\n",
       "      <td>-0.038649</td>\n",
       "      <td>-0.295344</td>\n",
       "      <td>-1.824726</td>\n",
       "      <td>L</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.358893</td>\n",
       "      <td>1.404453</td>\n",
       "      <td>0.704965</td>\n",
       "      <td>-0.200638</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.501840</td>\n",
       "      <td>0.659254</td>\n",
       "      <td>-0.421691</td>\n",
       "      <td>-0.057688</td>\n",
       "      <td>G</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.204886</td>\n",
       "      <td>1.074134</td>\n",
       "      <td>1.388361</td>\n",
       "      <td>-0.982404</td>\n",
       "      <td>R</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.354628</td>\n",
       "      <td>-0.133116</td>\n",
       "      <td>0.283763</td>\n",
       "      <td>-0.837063</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9995</th>\n",
       "      <td>2.311896</td>\n",
       "      <td>-0.417070</td>\n",
       "      <td>-1.409599</td>\n",
       "      <td>-0.515821</td>\n",
       "      <td>L</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9996</th>\n",
       "      <td>-0.479893</td>\n",
       "      <td>-0.650419</td>\n",
       "      <td>0.745152</td>\n",
       "      <td>-0.646038</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9997</th>\n",
       "      <td>0.523331</td>\n",
       "      <td>0.787112</td>\n",
       "      <td>0.486066</td>\n",
       "      <td>1.093156</td>\n",
       "      <td>K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9998</th>\n",
       "      <td>-0.362559</td>\n",
       "      <td>0.598894</td>\n",
       "      <td>-1.843201</td>\n",
       "      <td>0.887292</td>\n",
       "      <td>G</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999</th>\n",
       "      <td>-0.096376</td>\n",
       "      <td>-1.012999</td>\n",
       "      <td>-0.657431</td>\n",
       "      <td>-0.573315</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10000 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           one       two     three      four key\n",
       "0     0.467976 -0.038649 -0.295344 -1.824726   L\n",
       "1    -0.358893  1.404453  0.704965 -0.200638   B\n",
       "2    -0.501840  0.659254 -0.421691 -0.057688   G\n",
       "3     0.204886  1.074134  1.388361 -0.982404   R\n",
       "4     0.354628 -0.133116  0.283763 -0.837063   Q\n",
       "...        ...       ...       ...       ...  ..\n",
       "9995  2.311896 -0.417070 -1.409599 -0.515821   L\n",
       "9996 -0.479893 -0.650419  0.745152 -0.646038   E\n",
       "9997  0.523331  0.787112  0.486066  1.093156   K\n",
       "9998 -0.362559  0.598894 -1.843201  0.887292   G\n",
       "9999 -0.096376 -1.012999 -0.657431 -0.573315   0\n",
       "\n",
       "[10000 rows x 5 columns]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "4f03caa3-997f-4c64-b4aa-60a86487405e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "      <th>key</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.467976</td>\n",
       "      <td>-0.038649</td>\n",
       "      <td>-0.295344</td>\n",
       "      <td>-1.824726</td>\n",
       "      <td>L</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.358893</td>\n",
       "      <td>1.404453</td>\n",
       "      <td>0.704965</td>\n",
       "      <td>-0.200638</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.501840</td>\n",
       "      <td>0.659254</td>\n",
       "      <td>-0.421691</td>\n",
       "      <td>-0.057688</td>\n",
       "      <td>G</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.204886</td>\n",
       "      <td>1.074134</td>\n",
       "      <td>1.388361</td>\n",
       "      <td>-0.982404</td>\n",
       "      <td>R</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.354628</td>\n",
       "      <td>-0.133116</td>\n",
       "      <td>0.283763</td>\n",
       "      <td>-0.837063</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        one       two     three      four key\n",
       "0  0.467976 -0.038649 -0.295344 -1.824726   L\n",
       "1 -0.358893  1.404453  0.704965 -0.200638   B\n",
       "2 -0.501840  0.659254 -0.421691 -0.057688   G\n",
       "3  0.204886  1.074134  1.388361 -0.982404   R\n",
       "4  0.354628 -0.133116  0.283763 -0.837063   Q"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('examples/ex6.csv',nrows=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "ad20bf96-fa23-4e20-9d61-512f88143e51",
   "metadata": {},
   "outputs": [],
   "source": [
    "chunker=pd.read_csv('examples/ex6.csv',chunksize=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "eb47f3d4-3d00-4132-b678-45b1f71028f8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.io.parsers.readers.TextFileReader"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(chunker)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "efc89b0f-ada9-4f16-bda0-1047a4667c91",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "chunker=pd.read_csv('examples/ex6.csv',chunksize=1000)\n",
    "tot = pd.Series([],dtype='int64')\n",
    "for piece in chunker:\n",
    "    tot = tot.add(piece['key'].value_counts(),fill_value=0)\n",
    "tot=tot.sort_values(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "e95af24a-3b40-4158-af7a-a6b6b0fe3535",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "key\n",
       "E    368.0\n",
       "X    364.0\n",
       "L    346.0\n",
       "O    343.0\n",
       "Q    340.0\n",
       "M    338.0\n",
       "J    337.0\n",
       "F    335.0\n",
       "K    334.0\n",
       "H    330.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tot[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "196beee8-30c0-488e-9be2-82284a738481",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('examples/ex5.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "8a89c69e-434a-46f1-9444-232e45173be7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>three</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11.0</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  something  a   b     c   d message\n",
       "0       one  1   2   3.0   4     NaN\n",
       "1       two  5   6   NaN   8   world\n",
       "2     three  9  10  11.0  12     foo"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "d82c3877-ef56-4678-964e-9bd0ca377eaf",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_csv('examples/out.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "f064361d-637c-4088-8194-dab62a52adc7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ",something,a,b,c,d,message\n",
      "0,one,1,2,3.0,4,\n",
      "1,two,5,6,,8,world\n",
      "2,three,9,10,11.0,12,foo\n"
     ]
    }
   ],
   "source": [
    "printFile('examples/out.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "7fbc7f33-0975-4e1e-bcb7-0caa0801ffd5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "cae1808f-5975-456c-b628-2e88899a3452",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "|something|a|b|c|d|message\n",
      "0|one|1|2|3.0|4|\n",
      "1|two|5|6||8|world\n",
      "2|three|9|10|11.0|12|foo\n"
     ]
    }
   ],
   "source": [
    "data.to_csv(sys.stdout,sep='|')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "bf238970-2804-4f4a-8375-7eb0000b62b4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ",something,a,b,c,d,message\n",
      "0,one,1,2,3.0,4,NULL\n",
      "1,two,5,6,NULL,8,world\n",
      "2,three,9,10,11.0,12,foo\n"
     ]
    }
   ],
   "source": [
    "data.to_csv(sys.stdout,na_rep='NULL')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "a29e54d0-1aa2-46fd-9e37-a84c2c174eaa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "one,1,2,3.0,4,\n",
      "two,5,6,,8,world\n",
      "three,9,10,11.0,12,foo\n"
     ]
    }
   ],
   "source": [
    "data.to_csv(sys.stdout,index=False,header=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "bf30c4a6-77b0-43e3-9db7-2f69179875ae",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a,b,c\n",
      "1,2,3.0\n",
      "5,6,\n",
      "9,10,11.0\n"
     ]
    }
   ],
   "source": [
    "data.to_csv(sys.stdout,index=False,columns=['a','b','c'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "dcb90ecc-06a4-49c4-b90c-efe2fa425529",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\"a\",\"b\",\"c\"\n",
      "\"1\",\"2\",\"3\"\n",
      "\"1\",\"2\",\"3\"\n"
     ]
    }
   ],
   "source": [
    "printFile('examples/ex7.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "978cc393-997a-421c-b438-288c234fcea5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "ffffe122-dc32-4568-80e8-faaef1d94ab6",
   "metadata": {},
   "outputs": [],
   "source": [
    "f=open('examples/ex7.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "6ebb6ab5-b8cf-479b-9041-ed8b097dcda6",
   "metadata": {},
   "outputs": [],
   "source": [
    "reader = csv.reader(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "231cec64-b82e-4a55-94ec-6e41c39d2679",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['a', 'b', 'c']\n",
      "['1', '2', '3']\n",
      "['1', '2', '3']\n"
     ]
    }
   ],
   "source": [
    "for line in reader:\n",
    "    print(line)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "1f839254-8c45-49d2-8940-28aa4217cf01",
   "metadata": {},
   "outputs": [],
   "source": [
    "f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "3ce04275-b187-4c4c-be66-4f7d99913caf",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('examples/ex7.csv') as f:\n",
    "    lines = list(csv.reader(f))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "0c4b8e35-1b05-44e0-a137-3d46c6f0aea9",
   "metadata": {},
   "outputs": [],
   "source": [
    "header,values=lines[0],lines[1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "010b2d78-7354-4eaa-b702-786fe6603db3",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_dict={h:v for h,v in zip(header,zip(*values))}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "7098f5ba-565b-4746-a72e-241b3901cf4a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "556f2f0c-b872-406a-a2e5-c5a7dd8aa717",
   "metadata": {},
   "outputs": [],
   "source": [
    "class my_dialect(csv.Dialect):\n",
    "    lineterminator='\\n'\n",
    "    delimiter=';'\n",
    "    quotechar='\"'\n",
    "    quoting=csv.QUOTE_MINIMAL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "e55ca0a5-a8a2-4fa4-81f4-b005c83910da",
   "metadata": {},
   "outputs": [],
   "source": [
    "# reader = csv.reader(f,dialect=my_dialect)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "3cb536e7-755e-41dd-9663-e6132f572366",
   "metadata": {},
   "outputs": [],
   "source": [
    "# reader =csv.reader(f,delimiter='|')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "a4d270de-dbd4-4fb3-9c26-06435c037d0c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# with open('mydata.csv','w') as f:\n",
    "#     writer = csv.writer(f,dialect=my_dialect)\n",
    "#     writer.writerow(('one','two','three'))\n",
    "#     writer.writerow(('1','2','3'))\n",
    "#     writer.writerow(('4','5','6'))\n",
    "#     writer.writerow(('7','8','9'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "60ad9c64-7c6f-431e-804b-293ea632873a",
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = \"\"\"\n",
    "{\"name\": \"Wes\",\n",
    " \"cities_lived\": [\"Akron\", \"Nashville\", \"New York\", \"San Francisco\"],\n",
    " \"pet\": null,\n",
    " \"siblings\": [{\"name\": \"Scott\", \"age\": 34, \"hobbies\": [\"guitars\", \"soccer\"]},\n",
    "              {\"name\": \"Katie\", \"age\": 42, \"hobbies\": [\"diving\", \"art\"]}]\n",
    "}\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "2100be51-62a9-4380-96af-ce5bf9d554a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "da90452a-a224-4ac6-9f4a-510c6e867e78",
   "metadata": {},
   "outputs": [],
   "source": [
    "result = json.loads(obj)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "894d2c33-eb88-45c4-b47c-404fd4d3c6f2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'name': 'Wes',\n",
       " 'cities_lived': ['Akron', 'Nashville', 'New York', 'San Francisco'],\n",
       " 'pet': None,\n",
       " 'siblings': [{'name': 'Scott', 'age': 34, 'hobbies': ['guitars', 'soccer']},\n",
       "  {'name': 'Katie', 'age': 42, 'hobbies': ['diving', 'art']}]}"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "fa49e332-94fc-465a-880e-c238f366f6e9",
   "metadata": {},
   "outputs": [],
   "source": [
    "asjson=json.dumps(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "f12192d3-e4c1-4844-84ba-bd2dc09b4c0e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"name\": \"Wes\", \"cities_lived\": [\"Akron\", \"Nashville\", \"New York\", \"San Francisco\"], \"pet\": null, \"siblings\": [{\"name\": \"Scott\", \"age\": 34, \"hobbies\": [\"guitars\", \"soccer\"]}, {\"name\": \"Katie\", \"age\": 42, \"hobbies\": [\"diving\", \"art\"]}]}'"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "asjson"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "941f30a6-5ff3-44bb-a383-659b563ed890",
   "metadata": {},
   "outputs": [],
   "source": [
    "siblings=pd.DataFrame(result['siblings'],columns=['name','age'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "9978cc39-2941-4f1d-9bbf-64cb87608d2b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Scott</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Katie</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    name  age\n",
       "0  Scott   34\n",
       "1  Katie   42"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "siblings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "3dc063ac-c46b-465d-8ad7-38a172b9a8af",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{\"a\": 1, \"b\": 2, \"c\": 3},\n",
      " {\"a\": 4, \"b\": 5, \"c\": 6},\n",
      " {\"a\": 7, \"b\": 8, \"c\": 9}]\n"
     ]
    }
   ],
   "source": [
    "printFile('examples/example.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "59fc0d30-60f3-4b40-9a03-a1d789cc24cc",
   "metadata": {},
   "outputs": [],
   "source": [
    "data=pd.read_json('examples/example.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "eb07dc41-2bd9-43c3-a784-ee2006705743",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b  c\n",
       "0  1  2  3\n",
       "1  4  5  6\n",
       "2  7  8  9"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "19386869-f711-4add-9829-5a39b68ac613",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\"a\":{\"0\":1,\"1\":4,\"2\":7},\"b\":{\"0\":2,\"1\":5,\"2\":8},\"c\":{\"0\":3,\"1\":6,\"2\":9}}"
     ]
    }
   ],
   "source": [
    "data.to_json(sys.stdout)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "01fbaa02-11cb-4bde-bd2e-9463e638f6eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "tables = pd.read_html('examples/fdic_failed_bank_list.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "f0fb32cc-3092-431f-a59e-090c2d0f28f6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(tables)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "39d166fd-37b7-4445-971f-0c2b0aab0153",
   "metadata": {},
   "outputs": [],
   "source": [
    "failures = tables[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "1449cdf2-78ee-4b95-b1bf-bf13b9753182",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Bank Name</th>\n",
       "      <th>City</th>\n",
       "      <th>ST</th>\n",
       "      <th>CERT</th>\n",
       "      <th>Acquiring Institution</th>\n",
       "      <th>Closing Date</th>\n",
       "      <th>Updated Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Allied Bank</td>\n",
       "      <td>Mulberry</td>\n",
       "      <td>AR</td>\n",
       "      <td>91</td>\n",
       "      <td>Today's Bank</td>\n",
       "      <td>September 23, 2016</td>\n",
       "      <td>November 17, 2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>The Woodbury Banking Company</td>\n",
       "      <td>Woodbury</td>\n",
       "      <td>GA</td>\n",
       "      <td>11297</td>\n",
       "      <td>United Bank</td>\n",
       "      <td>August 19, 2016</td>\n",
       "      <td>November 17, 2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>First CornerStone Bank</td>\n",
       "      <td>King of Prussia</td>\n",
       "      <td>PA</td>\n",
       "      <td>35312</td>\n",
       "      <td>First-Citizens Bank &amp; Trust Company</td>\n",
       "      <td>May 6, 2016</td>\n",
       "      <td>September 6, 2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Trust Company Bank</td>\n",
       "      <td>Memphis</td>\n",
       "      <td>TN</td>\n",
       "      <td>9956</td>\n",
       "      <td>The Bank of Fayette County</td>\n",
       "      <td>April 29, 2016</td>\n",
       "      <td>September 6, 2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>North Milwaukee State Bank</td>\n",
       "      <td>Milwaukee</td>\n",
       "      <td>WI</td>\n",
       "      <td>20364</td>\n",
       "      <td>First-Citizens Bank &amp; Trust Company</td>\n",
       "      <td>March 11, 2016</td>\n",
       "      <td>June 16, 2016</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      Bank Name             City  ST   CERT  \\\n",
       "0                   Allied Bank         Mulberry  AR     91   \n",
       "1  The Woodbury Banking Company         Woodbury  GA  11297   \n",
       "2        First CornerStone Bank  King of Prussia  PA  35312   \n",
       "3            Trust Company Bank          Memphis  TN   9956   \n",
       "4    North Milwaukee State Bank        Milwaukee  WI  20364   \n",
       "\n",
       "                 Acquiring Institution        Closing Date       Updated Date  \n",
       "0                         Today's Bank  September 23, 2016  November 17, 2016  \n",
       "1                          United Bank     August 19, 2016  November 17, 2016  \n",
       "2  First-Citizens Bank & Trust Company         May 6, 2016  September 6, 2016  \n",
       "3           The Bank of Fayette County      April 29, 2016  September 6, 2016  \n",
       "4  First-Citizens Bank & Trust Company      March 11, 2016      June 16, 2016  "
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "failures.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "692d6832-fc44-409d-ad55-d6c5f4365e35",
   "metadata": {},
   "outputs": [],
   "source": [
    "close_timestamps=pd.to_datetime(failures['Closing Date'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "6345496d-7bf2-4560-897e-17cca6eb451c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Closing Date\n",
       "2010    157\n",
       "2009    140\n",
       "2011     92\n",
       "2012     51\n",
       "2008     25\n",
       "       ... \n",
       "2004      4\n",
       "2001      4\n",
       "2007      3\n",
       "2003      3\n",
       "2000      2\n",
       "Name: count, Length: 15, dtype: int64"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "close_timestamps.dt.year.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "9976b1df-6712-40d4-8715-d733bc99061b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from lxml import objectify"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "d15eb750-02b5-484c-bf8b-220675bbd91f",
   "metadata": {},
   "outputs": [],
   "source": [
    "path='datasets/mta_perf/Performance_MNR.xml'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "6bd1a658-76ae-4766-a4e6-b21a5f5c6d34",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(path) as f:\n",
    "    parsed = objectify.parse(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "eb349ff1-5bb5-4291-8f6c-4853b632a28f",
   "metadata": {},
   "outputs": [],
   "source": [
    "root =parsed.getroot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "ddee8374-7886-4c53-b69c-9f75bfb231c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "data=[]\n",
    "skip_fields=['PARENT_SEQ','INDICATOR_SEQ','DESIRED_CHANGE','DECIMAL_PLACES']\n",
    "for elt in root.INDICATOR:\n",
    "    el_data={}\n",
    "    for child in elt.getchildren():\n",
    "        if child.tag in skip_fields:\n",
    "            continue\n",
    "        el_data[child.tag] = child.pyval\n",
    "    data.append(el_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "7044709e-c4e4-4881-abab-d3a743cfb214",
   "metadata": {},
   "outputs": [],
   "source": [
    "pref=pd.DataFrame(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "id": "bf06f6d7-e24a-4ec0-8366-a723687c021e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AGENCY_NAME</th>\n",
       "      <th>INDICATOR_NAME</th>\n",
       "      <th>DESCRIPTION</th>\n",
       "      <th>PERIOD_YEAR</th>\n",
       "      <th>PERIOD_MONTH</th>\n",
       "      <th>CATEGORY</th>\n",
       "      <th>FREQUENCY</th>\n",
       "      <th>INDICATOR_UNIT</th>\n",
       "      <th>YTD_TARGET</th>\n",
       "      <th>YTD_ACTUAL</th>\n",
       "      <th>MONTHLY_TARGET</th>\n",
       "      <th>MONTHLY_ACTUAL</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Metro-North Railroad</td>\n",
       "      <td>On-Time Performance (West of Hudson)</td>\n",
       "      <td>Percent of commuter trains that arrive at thei...</td>\n",
       "      <td>2008</td>\n",
       "      <td>1</td>\n",
       "      <td>Service Indicators</td>\n",
       "      <td>M</td>\n",
       "      <td>%</td>\n",
       "      <td>95.0</td>\n",
       "      <td>96.9</td>\n",
       "      <td>95.0</td>\n",
       "      <td>96.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Metro-North Railroad</td>\n",
       "      <td>On-Time Performance (West of Hudson)</td>\n",
       "      <td>Percent of commuter trains that arrive at thei...</td>\n",
       "      <td>2008</td>\n",
       "      <td>2</td>\n",
       "      <td>Service Indicators</td>\n",
       "      <td>M</td>\n",
       "      <td>%</td>\n",
       "      <td>95.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>95.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Metro-North Railroad</td>\n",
       "      <td>On-Time Performance (West of Hudson)</td>\n",
       "      <td>Percent of commuter trains that arrive at thei...</td>\n",
       "      <td>2008</td>\n",
       "      <td>3</td>\n",
       "      <td>Service Indicators</td>\n",
       "      <td>M</td>\n",
       "      <td>%</td>\n",
       "      <td>95.0</td>\n",
       "      <td>96.3</td>\n",
       "      <td>95.0</td>\n",
       "      <td>96.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Metro-North Railroad</td>\n",
       "      <td>On-Time Performance (West of Hudson)</td>\n",
       "      <td>Percent of commuter trains that arrive at thei...</td>\n",
       "      <td>2008</td>\n",
       "      <td>4</td>\n",
       "      <td>Service Indicators</td>\n",
       "      <td>M</td>\n",
       "      <td>%</td>\n",
       "      <td>95.0</td>\n",
       "      <td>96.8</td>\n",
       "      <td>95.0</td>\n",
       "      <td>98.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Metro-North Railroad</td>\n",
       "      <td>On-Time Performance (West of Hudson)</td>\n",
       "      <td>Percent of commuter trains that arrive at thei...</td>\n",
       "      <td>2008</td>\n",
       "      <td>5</td>\n",
       "      <td>Service Indicators</td>\n",
       "      <td>M</td>\n",
       "      <td>%</td>\n",
       "      <td>95.0</td>\n",
       "      <td>96.6</td>\n",
       "      <td>95.0</td>\n",
       "      <td>95.8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            AGENCY_NAME                        INDICATOR_NAME  \\\n",
       "0  Metro-North Railroad  On-Time Performance (West of Hudson)   \n",
       "1  Metro-North Railroad  On-Time Performance (West of Hudson)   \n",
       "2  Metro-North Railroad  On-Time Performance (West of Hudson)   \n",
       "3  Metro-North Railroad  On-Time Performance (West of Hudson)   \n",
       "4  Metro-North Railroad  On-Time Performance (West of Hudson)   \n",
       "\n",
       "                                         DESCRIPTION  PERIOD_YEAR  \\\n",
       "0  Percent of commuter trains that arrive at thei...         2008   \n",
       "1  Percent of commuter trains that arrive at thei...         2008   \n",
       "2  Percent of commuter trains that arrive at thei...         2008   \n",
       "3  Percent of commuter trains that arrive at thei...         2008   \n",
       "4  Percent of commuter trains that arrive at thei...         2008   \n",
       "\n",
       "   PERIOD_MONTH            CATEGORY FREQUENCY INDICATOR_UNIT YTD_TARGET  \\\n",
       "0             1  Service Indicators         M              %       95.0   \n",
       "1             2  Service Indicators         M              %       95.0   \n",
       "2             3  Service Indicators         M              %       95.0   \n",
       "3             4  Service Indicators         M              %       95.0   \n",
       "4             5  Service Indicators         M              %       95.0   \n",
       "\n",
       "  YTD_ACTUAL MONTHLY_TARGET MONTHLY_ACTUAL  \n",
       "0       96.9           95.0           96.9  \n",
       "1       96.0           95.0           95.0  \n",
       "2       96.3           95.0           96.9  \n",
       "3       96.8           95.0           98.3  \n",
       "4       96.6           95.0           95.8  "
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pref.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "id": "fc7f3459-3e37-4c5c-a61a-9cea1d2865d0",
   "metadata": {},
   "outputs": [],
   "source": [
    "pref2 = pd.read_xml(path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "id": "61ff7d99-cd50-4bd8-92d7-bc799890a7d9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>INDICATOR_SEQ</th>\n",
       "      <th>PARENT_SEQ</th>\n",
       "      <th>AGENCY_NAME</th>\n",
       "      <th>INDICATOR_NAME</th>\n",
       "      <th>DESCRIPTION</th>\n",
       "      <th>PERIOD_YEAR</th>\n",
       "      <th>PERIOD_MONTH</th>\n",
       "      <th>CATEGORY</th>\n",
       "      <th>FREQUENCY</th>\n",
       "      <th>DESIRED_CHANGE</th>\n",
       "      <th>INDICATOR_UNIT</th>\n",
       "      <th>DECIMAL_PLACES</th>\n",
       "      <th>YTD_TARGET</th>\n",
       "      <th>YTD_ACTUAL</th>\n",
       "      <th>MONTHLY_TARGET</th>\n",
       "      <th>MONTHLY_ACTUAL</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>28445</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Metro-North Railroad</td>\n",
       "      <td>On-Time Performance (West of Hudson)</td>\n",
       "      <td>Percent of commuter trains that arrive at thei...</td>\n",
       "      <td>2008</td>\n",
       "      <td>1</td>\n",
       "      <td>Service Indicators</td>\n",
       "      <td>M</td>\n",
       "      <td>U</td>\n",
       "      <td>%</td>\n",
       "      <td>1</td>\n",
       "      <td>95.00</td>\n",
       "      <td>96.90</td>\n",
       "      <td>95.00</td>\n",
       "      <td>96.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>28445</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Metro-North Railroad</td>\n",
       "      <td>On-Time Performance (West of Hudson)</td>\n",
       "      <td>Percent of commuter trains that arrive at thei...</td>\n",
       "      <td>2008</td>\n",
       "      <td>2</td>\n",
       "      <td>Service Indicators</td>\n",
       "      <td>M</td>\n",
       "      <td>U</td>\n",
       "      <td>%</td>\n",
       "      <td>1</td>\n",
       "      <td>95.00</td>\n",
       "      <td>96.00</td>\n",
       "      <td>95.00</td>\n",
       "      <td>95.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>28445</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Metro-North Railroad</td>\n",
       "      <td>On-Time Performance (West of Hudson)</td>\n",
       "      <td>Percent of commuter trains that arrive at thei...</td>\n",
       "      <td>2008</td>\n",
       "      <td>3</td>\n",
       "      <td>Service Indicators</td>\n",
       "      <td>M</td>\n",
       "      <td>U</td>\n",
       "      <td>%</td>\n",
       "      <td>1</td>\n",
       "      <td>95.00</td>\n",
       "      <td>96.30</td>\n",
       "      <td>95.00</td>\n",
       "      <td>96.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>28445</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Metro-North Railroad</td>\n",
       "      <td>On-Time Performance (West of Hudson)</td>\n",
       "      <td>Percent of commuter trains that arrive at thei...</td>\n",
       "      <td>2008</td>\n",
       "      <td>4</td>\n",
       "      <td>Service Indicators</td>\n",
       "      <td>M</td>\n",
       "      <td>U</td>\n",
       "      <td>%</td>\n",
       "      <td>1</td>\n",
       "      <td>95.00</td>\n",
       "      <td>96.80</td>\n",
       "      <td>95.00</td>\n",
       "      <td>98.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28445</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Metro-North Railroad</td>\n",
       "      <td>On-Time Performance (West of Hudson)</td>\n",
       "      <td>Percent of commuter trains that arrive at thei...</td>\n",
       "      <td>2008</td>\n",
       "      <td>5</td>\n",
       "      <td>Service Indicators</td>\n",
       "      <td>M</td>\n",
       "      <td>U</td>\n",
       "      <td>%</td>\n",
       "      <td>1</td>\n",
       "      <td>95.00</td>\n",
       "      <td>96.60</td>\n",
       "      <td>95.00</td>\n",
       "      <td>95.80</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   INDICATOR_SEQ  PARENT_SEQ           AGENCY_NAME  \\\n",
       "0          28445         NaN  Metro-North Railroad   \n",
       "1          28445         NaN  Metro-North Railroad   \n",
       "2          28445         NaN  Metro-North Railroad   \n",
       "3          28445         NaN  Metro-North Railroad   \n",
       "4          28445         NaN  Metro-North Railroad   \n",
       "\n",
       "                         INDICATOR_NAME  \\\n",
       "0  On-Time Performance (West of Hudson)   \n",
       "1  On-Time Performance (West of Hudson)   \n",
       "2  On-Time Performance (West of Hudson)   \n",
       "3  On-Time Performance (West of Hudson)   \n",
       "4  On-Time Performance (West of Hudson)   \n",
       "\n",
       "                                         DESCRIPTION  PERIOD_YEAR  \\\n",
       "0  Percent of commuter trains that arrive at thei...         2008   \n",
       "1  Percent of commuter trains that arrive at thei...         2008   \n",
       "2  Percent of commuter trains that arrive at thei...         2008   \n",
       "3  Percent of commuter trains that arrive at thei...         2008   \n",
       "4  Percent of commuter trains that arrive at thei...         2008   \n",
       "\n",
       "   PERIOD_MONTH            CATEGORY FREQUENCY DESIRED_CHANGE INDICATOR_UNIT  \\\n",
       "0             1  Service Indicators         M              U              %   \n",
       "1             2  Service Indicators         M              U              %   \n",
       "2             3  Service Indicators         M              U              %   \n",
       "3             4  Service Indicators         M              U              %   \n",
       "4             5  Service Indicators         M              U              %   \n",
       "\n",
       "   DECIMAL_PLACES YTD_TARGET YTD_ACTUAL MONTHLY_TARGET MONTHLY_ACTUAL  \n",
       "0               1      95.00      96.90          95.00          96.90  \n",
       "1               1      95.00      96.00          95.00          95.00  \n",
       "2               1      95.00      96.30          95.00          96.90  \n",
       "3               1      95.00      96.80          95.00          98.30  \n",
       "4               1      95.00      96.60          95.00          95.80  "
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pref2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "a2fb221d-6cf2-4355-8172-b62fc4c8173d",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = pd.read_csv('examples/ex1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "28585880-732f-43d4-887e-d5f4446b5e9d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a   b   c   d message\n",
       "0  1   2   3   4   hello\n",
       "1  5   6   7   8   world\n",
       "2  9  10  11  12     foo"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "id": "914c730b-264f-4614-a157-3d61d2aa595d",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.to_pickle('examples/frame_pickle')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "id": "237bd6e3-a233-4cdc-8988-4b3bb4b747f4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a   b   c   d message\n",
       "0  1   2   3   4   hello\n",
       "1  5   6   7   8   world\n",
       "2  9  10  11  12     foo"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_pickle('examples/frame_pickle')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "id": "6dee48ab-a06e-4583-8aa3-01f9a9241041",
   "metadata": {},
   "outputs": [],
   "source": [
    "fec = pd.read_parquet('datasets/fec/fec.parquet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "id": "9eb0f8fd-cc0d-4764-a8d9-cc956c6a788c",
   "metadata": {},
   "outputs": [],
   "source": [
    "xlsx=pd.ExcelFile('examples/ex1.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "0a5f6a6d-d58f-4134-9020-a643c1b1da50",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Sheet1']"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xlsx.sheet_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "8679bdfa-b0c8-43b9-b902-26d2f9618aaf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0  a   b   c   d message\n",
       "0           0  1   2   3   4   hello\n",
       "1           1  5   6   7   8   world\n",
       "2           2  9  10  11  12     foo"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xlsx.parse(sheet_name='Sheet1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "id": "6558b5de-8027-4ebd-a7ae-3c73a64f0403",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a   b   c   d message\n",
       "0  1   2   3   4   hello\n",
       "1  5   6   7   8   world\n",
       "2  9  10  11  12     foo"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xlsx.parse(sheet_name='Sheet1',index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "id": "7b724b72-424a-4949-a57c-74c77a3806a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame=pd.read_excel('examples/ex1.xlsx',sheet_name='Sheet1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "id": "cd19d6d1-fce9-4c02-a3c6-1261db233a0a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0  a   b   c   d message\n",
       "0           0  1   2   3   4   hello\n",
       "1           1  5   6   7   8   world\n",
       "2           2  9  10  11  12     foo"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "id": "6ca5a9aa-1687-4d25-ba73-124b2e98f6ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "writer= pd.ExcelWriter('examples/ex2.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "id": "ce627951-f292-4a63-80c2-a7c7011f4d63",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.to_excel(writer,'Sheet1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "id": "3ff6bc11-a24a-4f78-b74b-849185684125",
   "metadata": {},
   "outputs": [],
   "source": [
    "writer.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "id": "0aa888fe-c275-4d66-9bf2-f770e126fd29",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.to_excel('examples/ex2.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "id": "a576f8ec-56ec-470e-832c-b206c82ec1f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = pd.DataFrame({'a':np.random.standard_normal(100)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "id": "2317ac9d-0c71-46a6-82c6-0e90d5bb8525",
   "metadata": {},
   "outputs": [],
   "source": [
    "store = pd.HDFStore('examples/mydata.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "id": "92b9c894-46e2-4132-93fe-ccf3b43f0e04",
   "metadata": {},
   "outputs": [],
   "source": [
    "store['obj1'] = frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "id": "9f295b47-bbfe-43b5-9b24-8700697a2437",
   "metadata": {},
   "outputs": [],
   "source": [
    "store['obj1_col'] = frame['a']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "id": "c2423769-7804-4e61-b551-70ef136e0408",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<class 'pandas.io.pytables.HDFStore'>\n",
       "File path: examples/mydata.h5"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "store"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "id": "9eda8875-698a-462b-b603-f4cc41114d5b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.898085</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.217748</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.464943</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.968821</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.642717</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>-0.745882</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>0.339117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>-0.619380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>0.085446</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>0.572783</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           a\n",
       "0  -0.898085\n",
       "1   0.217748\n",
       "2   0.464943\n",
       "3   0.968821\n",
       "4   1.642717\n",
       "..       ...\n",
       "95 -0.745882\n",
       "96  0.339117\n",
       "97 -0.619380\n",
       "98  0.085446\n",
       "99  0.572783\n",
       "\n",
       "[100 rows x 1 columns]"
      ]
     },
     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "store['obj1']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "id": "3ba54a69-8187-4e68-aba1-9379ebb6d7ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "store.put('obj2',frame,format='table')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "id": "ac3d80b8-893c-46db-a9c2-23ee7be33532",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0.194357</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>0.591869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>0.353125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>-0.358327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1.490823</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>-1.450587</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           a\n",
       "10  0.194357\n",
       "11  0.591869\n",
       "12  0.353125\n",
       "13 -0.358327\n",
       "14  1.490823\n",
       "15 -1.450587"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "store.select('obj2',where=['index>=10 and index<=15'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "id": "d023b568-5d0f-4ce7-b49e-4ee64c99b92c",
   "metadata": {},
   "outputs": [],
   "source": [
    "store.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "id": "6a667b9e-73c1-48fe-bb3d-8e18fbd208d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.to_hdf('examples/mydata.h5','obj3',format='table')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "id": "0ea2e40b-601d-41c0-98c5-706d7d83dc52",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.898085</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.217748</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.464943</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.968821</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.642717</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          a\n",
       "0 -0.898085\n",
       "1  0.217748\n",
       "2  0.464943\n",
       "3  0.968821\n",
       "4  1.642717"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_hdf('examples/mydata.h5','obj3',where=['index<5'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "id": "d36d13e5-ec22-4c5a-bfac-441d8303ab44",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "id": "575798c9-3a7c-4ce0-b1d4-db9dca0e224b",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.remove('examples/mydata.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "id": "46d1d489-2a43-4321-aae0-96712fc79f42",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "id": "4a1d9c2f-c83f-4bbb-b617-86dda760484d",
   "metadata": {},
   "outputs": [],
   "source": [
    "url ='https://api.github.com/repos/pandas-dev/pandas/issues'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "id": "15cfdb1b-0206-4f7c-96d0-2cb3e0e3565d",
   "metadata": {},
   "outputs": [],
   "source": [
    "resp = requests.get(url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "id": "b3557342-ecd2-43b7-b424-de4fc91707e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "resp.raise_for_status()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "id": "90ffc2b1-f023-4561-850f-ffa259230d1f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Response [200]>"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "id": "9e12966f-4ea5-42bc-822c-22a51afdb9c2",
   "metadata": {},
   "outputs": [],
   "source": [
    "data=resp.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "id": "1dc9d7e9-f6a2-4da2-8e4d-31d11e85e29c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Avoid floating points for integral floordiv for pyarrow types'"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0]['title']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "id": "4f66c7b6-e3c9-49c9-8cb5-f2d2cfd4cea8",
   "metadata": {},
   "outputs": [],
   "source": [
    "issues=pd.DataFrame(data,columns=['number','title','labels','state'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "id": "f6cfe237-f5d5-4b09-8cbc-ca07bd1e08fc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>number</th>\n",
       "      <th>title</th>\n",
       "      <th>labels</th>\n",
       "      <th>state</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>56677</td>\n",
       "      <td>Avoid floating points for integral floordiv fo...</td>\n",
       "      <td>[]</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>56676</td>\n",
       "      <td>BUG: Floordiv with integral pyarrow types shou...</td>\n",
       "      <td>[{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>56675</td>\n",
       "      <td>TYP: more misc annotations</td>\n",
       "      <td>[]</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>56674</td>\n",
       "      <td>BUG: inconsistant `MultiIndex` behavior in `.loc`</td>\n",
       "      <td>[{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>56673</td>\n",
       "      <td>BUG: Can't cast pyarrow floats to ints</td>\n",
       "      <td>[{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>56605</td>\n",
       "      <td>ENH: Add alternative html for Styler tooltips</td>\n",
       "      <td>[{'id': 76812, 'node_id': 'MDU6TGFiZWw3NjgxMg=...</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>56600</td>\n",
       "      <td>BUG: incompatible dtype when creating bool col...</td>\n",
       "      <td>[{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>56599</td>\n",
       "      <td>BUG: DataFrame.replace with regex on `StringDt...</td>\n",
       "      <td>[{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>56598</td>\n",
       "      <td>BUG: Fix inconsistency when constructing a Ser...</td>\n",
       "      <td>[]</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>56596</td>\n",
       "      <td>DEPR: List of deprecations to be removed in 4.0</td>\n",
       "      <td>[]</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>30 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    number                                              title  \\\n",
       "0    56677  Avoid floating points for integral floordiv fo...   \n",
       "1    56676  BUG: Floordiv with integral pyarrow types shou...   \n",
       "2    56675                         TYP: more misc annotations   \n",
       "3    56674  BUG: inconsistant `MultiIndex` behavior in `.loc`   \n",
       "4    56673             BUG: Can't cast pyarrow floats to ints   \n",
       "..     ...                                                ...   \n",
       "25   56605      ENH: Add alternative html for Styler tooltips   \n",
       "26   56600  BUG: incompatible dtype when creating bool col...   \n",
       "27   56599  BUG: DataFrame.replace with regex on `StringDt...   \n",
       "28   56598  BUG: Fix inconsistency when constructing a Ser...   \n",
       "29   56596    DEPR: List of deprecations to be removed in 4.0   \n",
       "\n",
       "                                               labels state  \n",
       "0                                                  []  open  \n",
       "1   [{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...  open  \n",
       "2                                                  []  open  \n",
       "3   [{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...  open  \n",
       "4   [{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...  open  \n",
       "..                                                ...   ...  \n",
       "25  [{'id': 76812, 'node_id': 'MDU6TGFiZWw3NjgxMg=...  open  \n",
       "26  [{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...  open  \n",
       "27  [{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...  open  \n",
       "28                                                 []  open  \n",
       "29                                                 []  open  \n",
       "\n",
       "[30 rows x 4 columns]"
      ]
     },
     "execution_count": 147,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "issues"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "id": "a82fc316-4dfb-4f1e-bc27-41c4cdc2721c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sqlite3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "id": "44a2d6b7-5716-4944-a0bc-75028d05d406",
   "metadata": {},
   "outputs": [],
   "source": [
    "query=\"\"\"\n",
    "    CREATE TABLE test (\n",
    "        a VARCHAR(20),\n",
    "        b VARCHAR(20),\n",
    "        c REAl,\n",
    "        d INTEGER\n",
    "    );\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "id": "c77d17e3-b81c-4d7f-a4ea-fff37be9b7a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "con = sqlite3.connect('mydata.sqlite')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "id": "4a4a1173-6646-4151-b6b8-05e63614d12d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<sqlite3.Cursor at 0x1b863ac50a0>"
      ]
     },
     "execution_count": 151,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "con.execute(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "id": "78b1942a-c8a7-4519-8e56-24806dae9222",
   "metadata": {},
   "outputs": [],
   "source": [
    "con.commit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "id": "87628e99-1eff-4182-a04a-4bbffde9b84e",
   "metadata": {},
   "outputs": [],
   "source": [
    "data=[('Atlanta','Georgia',1.25,6),('Tallahassee','Florida',2.6,3),('Sacramento','California',1.7,5)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "id": "88a2938a-69f2-4e07-8f07-4ca28c8557e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "stmt='INSERT INTO test VALUES(?,?,?,?)'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "id": "1adb25e3-5da9-4013-a6fe-1502e2538ffd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<sqlite3.Cursor at 0x1b8639d9650>"
      ]
     },
     "execution_count": 155,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "con.executemany(stmt,data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "id": "47b7e676-3799-47d9-974c-7825e5f5845a",
   "metadata": {},
   "outputs": [],
   "source": [
    "con.commit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "id": "b8886e17-a972-471d-b6ff-209967405f1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "cursor = con.execute('SELECT * FROM test')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "id": "78ebcae1-76ad-4a76-9c90-870f423e4567",
   "metadata": {},
   "outputs": [],
   "source": [
    "rows =cursor.fetchall()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "id": "fa71fe03-7110-482b-b0c4-86aa5c2a7f60",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Atlanta', 'Georgia', 1.25, 6),\n",
       " ('Tallahassee', 'Florida', 2.6, 3),\n",
       " ('Sacramento', 'California', 1.7, 5)]"
      ]
     },
     "execution_count": 159,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rows"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "id": "059a01a3-4cc4-4c3d-9e7e-d687ce36aa8f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(('a', None, None, None, None, None, None),\n",
       " ('b', None, None, None, None, None, None),\n",
       " ('c', None, None, None, None, None, None),\n",
       " ('d', None, None, None, None, None, None))"
      ]
     },
     "execution_count": 160,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cursor.description"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "id": "a6604a83-37a2-4fe8-80e2-3af04bafeb21",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Atlanta</td>\n",
       "      <td>Georgia</td>\n",
       "      <td>1.25</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Tallahassee</td>\n",
       "      <td>Florida</td>\n",
       "      <td>2.60</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sacramento</td>\n",
       "      <td>California</td>\n",
       "      <td>1.70</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             a           b     c  d\n",
       "0      Atlanta     Georgia  1.25  6\n",
       "1  Tallahassee     Florida  2.60  3\n",
       "2   Sacramento  California  1.70  5"
      ]
     },
     "execution_count": 161,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(rows,columns=[x[0] for x in cursor.description])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "id": "4cf5ca55-7164-42fc-9992-ee35d00011b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sqlalchemy as sqla"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "id": "d5a96211-b1c3-4b7b-b7c9-000e1a7d7629",
   "metadata": {},
   "outputs": [],
   "source": [
    "db = sqla.create_engine('sqlite:///mydata.sqlite')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "id": "453ade75-2a7b-4fba-ba8c-7cbb1bb246ca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Atlanta</td>\n",
       "      <td>Georgia</td>\n",
       "      <td>1.25</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Tallahassee</td>\n",
       "      <td>Florida</td>\n",
       "      <td>2.60</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sacramento</td>\n",
       "      <td>California</td>\n",
       "      <td>1.70</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             a           b     c  d\n",
       "0      Atlanta     Georgia  1.25  6\n",
       "1  Tallahassee     Florida  2.60  3\n",
       "2   Sacramento  California  1.70  5"
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_sql('SELECT * FROM test',db)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8101812e-5fa0-4a00-8075-969424ae2091",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
